global workingnew = "$BS_fold/local1/inputs/"
******************************************************************************
********************************** Full Sample *******************************
******************************************************************************
use "$clean/prowess_wits_IO.dta", clear
rename products_cocode1 co_code1
generate date_string = prod_date3
generate year2  = substr(date_string,1,4)
generate month2  = substr(date_string,5,2)
generate day2  = substr(date_string,7,2)
keep if ann_rep_months4==12 // Keep firms reporting data for 12 months.
keep if month2=="03"
destring year, replace
merge m:1 co_code1 year using "$working/perpetual_inventory_capital"
drop if _merge==2
drop _merge
keep co_code1 year nic_08_2dig sales_value sales_quantity micro_K sa_compensation_to_e_160 energy_cons_quantity  energy_intensity
keep if nic_08_2dig =="10" | nic_08_2dig =="13" | nic_08_2dig =="17" | nic_08_2dig =="20" | nic_08_2dig =="21"  | nic_08_2dig =="22" | nic_08_2dig =="23" ///
 | nic_08_2dig =="24" | nic_08_2dig =="25" | nic_08_2dig =="26"| nic_08_2dig =="27" | nic_08_2dig =="28" | nic_08_2dig =="29"

merge m:1 year using "$working/wholesale_priceindex"
drop if _merge==2
drop _merge
*Instead of 100, make base year 1
replace wholesale_priceindex = wholesale_priceindex/100
generate k = log(micro_K)
generate l= log(sa_compensation_to_e_160/wholesale_priceindex)
drop if missing(l) | missing(k) | missing(sales_value) | sales_value == 0

generate REV = sales_value/wholesale_priceindex
gen q_s = ln(sales_quantity)
collapse (sum) REV (mean) l k  (count) q_s energy_obs = energy_intensity, by(co_code1 year)
gen reported_q = q>0
gen reported_e = energy_obs>0
generate r = ln(REV)
drop REV
keep co_code1 year r l k e reported_q reported_e 

preserve
collapse (mean) r k l (count) obs = r (sum) reported_q reported_e 
gen missing_energy_obs = obs - reported_e
gen missing_q_obs = obs - reported_q
gen sample = "Full Sample"
save "$working/temp/full_sample_stats", replace
restore

merge m:1 co_code1 year using "$working/estimation_sample.dta"
gen full_vs_estimation = "full_only" if _merge == 1
replace full_vs_estimation = "merged" if _merge == 3
drop _merge

preserve
keep if full_vs_estimation == "merged"
collapse (mean) r k l (count) obs = r (sum) reported_q reported_e 
gen missing_energy_obs = obs - reported_e
gen missing_q_obs = obs - reported_q
gen sample = "Estimation Sample"
save "$working/temp/estimation_sample_stats", replace
restore

preserve
keep if full_vs_estimation == "full_only"
collapse (mean) r k l (count) obs = r (sum) reported_q reported_e 
gen missing_energy_obs = obs - reported_e
gen missing_q_obs = obs - reported_q
gen sample = "Full excluding Estimation Sample"
save "$working/temp/full_exclusing_estimation_sample_stats", replace
restore

merge m:1 co_code1 year using "$workingnew/ProdR_new_kl_sampleSS"
contract co_code1 year r k l e _merge full_vs_estimation reported_q reported_e 
drop _freq
rename _merge decomposition_sample_merge
save "$working/temp/full_sample", replace

preserve
keep if decomposition_sample_merge == 3
collapse (mean) r k l (count) obs = r (sum) reported_q reported_e 
gen missing_energy_obs = obs - reported_e
gen missing_q_obs = obs - reported_q
gen sample = "Decomposition Sample"
save "$working/temp/decomposition_sample_stats", replace
restore

preserve
keep if full_vs_estimation == "merged" & decomposition_sample_merge == 1
collapse (mean) r k l (count) obs = r (sum) reported_q reported_e 
gen missing_energy_obs = obs - reported_e
gen missing_q_obs = obs - reported_q
gen sample = "Estimation excluding Decomposition Sample"
save "$working/temp/estimation_excluding_decomposition_sample_stats", replace
restore

preserve
keep if decomposition_sample_merge == 1
collapse (mean) r k l (count) obs = r (sum) reported_q reported_e 
gen missing_energy_obs = obs - reported_e
gen missing_q_obs = obs - reported_q
gen sample = "Full excluding Decomposition Sample"
save "$working/temp/full_excluding_decomposition_sample_stats", replace
restore

****
use "$working/temp/full_sample_stats", clear
append using "$working/temp/estimation_sample_stats"
append using "$working/temp/decomposition_sample_stats"
append using "$working/temp/full_exclusing_estimation_sample_stats"
append using "$working/temp/full_excluding_decomposition_sample_stats"
append using "$working/temp/estimation_excluding_decomposition_sample_stats"

order sample obs missing_energy_obs missing_q_obs r k l

la var obs "Obs."
la var missing_energy_obs "Missing Energy Obs."
la var missing_q_obs "Missing Quantity Obs."
la var r "ln(sales)"
la var k "ln(capital)"
la var l "ln(labour)"
format r k l %9.3f
texsave sample obs missing_energy_obs missing_q_obs r k l using "$tables/sample_comparisons.tex", ///
	footnote("Notes: Column (1) reports the total number of observations across all three samples. Columns (2) and (3) indicate the number of observations in each sample that lack energy and quantity information, respectively. Columns (4)-(6) present the average sales, capital, and labor for each sample. These values are expressed as logarithms of millions of 2010 Indian rupees.") ///
	title("Firm size comparisons across samples") ///
	nofix hlines(3) autonumber width(1.006\textwidth) replace location(h)  varlab marker(tab:comparisons) frag  
